x1 = seq(-10,10)
x2_1 = 1 + 3*x1
x2_2 = 1 - ((1/2)*x1)
matplot(x = x1, y=cbind(x2_1,x2_2),type="l",ylab="X2")
text(x=-3,y=20,labels="(a) 1+3X1 −X2 < 0")
text(x=0,y=-20,labels="(a) 1+3X1 −X2 > 0")
text(x=-7,y=-9,labels="(b) −2 + X1 + 2X2 < 0",col="red")
text(x=6,y=5,labels="(b) −2 + X1 + 2X2 > 0",col="red")
plot(NA,NA,type="n",xlim=c(-5,3),ylim=c(-1,5),xlab="X1",ylab="X2",asp=1)
symbols(c(-1),c(2),circles=c(2),add=T,inches=F)
predF = function(x1,x2){
(1+x1)^2 + (2-x2)^2 - 4
}
predF(-1,2)
## [1] -4
predF(-4,2)
## [1] 5
predF(-1,5)
## [1] 5
text(x=-1,y=2,labels="< 4")
text(x=-4,y=2,labels="> 4")
ifelse(predF(0,0)>0,"Blue","Red")
## [1] "Blue"
ifelse(predF(-1,1)>0,"Blue","Red")
## [1] "Red"
ifelse(predF(2,2)>0,"Blue","Red")
## [1] "Blue"
ifelse(predF(3,8)>0,"Blue","Red")
## [1] "Blue"
The decision boundary can be rewritten as a function of the parameters, beta, that just treat X1^2 and X2^2 as features. So, if we pre-compute/evaluate those features ahead of time, then they aren’t part of the function, and the function that’s being fit is a linear additive function.
x1 = c(3,2,4,1,2,4,4)
x2 = c(4,2,4,4,1,3,1)
y = c("Red","Red","Red","Red","Blue","Blue","Blue")
plot(x1,x2,col=y)
abline(-.5,1)
text(x=3,y=2.25,labels="0=x1-x2-0.5")
abline(0,1,lty=2)
abline(-1,1,lty=2)
points(2,1,pch=4,col="blue",cex=2)
points(2,2,pch=4,col="red",cex=2)
points(4,3,pch=4,col="blue",cex=2)
points(4,4,pch=4,col="red",cex=2)
abline(-.5,1.1,col="green",lty=2)
text(x=3,y=3,labels="0=1.1x1-x2-0.5",col="green")
points(x=2,y=2.5,col="blue")
library(e1071)
set.seed(1)
x = matrix(rnorm(100*2),ncol=2)
x[1:50,]=x[1:50,]-1
x[51:100,]=x[51:100,]+.5
y = c(rep(1,50),rep(2,50))
plot(x,col=y)
dat = data.frame(x=x,y=as.factor(y))
train = sample(100,50)
dat.train = dat[train,]
dat.test = dat[-train,]
fitsvm.linear = svm(y~.,data=dat.train,kernel="linear",cost=10000)
plot(fitsvm.linear,dat.train)
table(pred=fitsvm.linear$fitted,y=dat.train$y)
## y
## pred 1 2
## 1 21 1
## 2 1 27
mean(fitsvm.linear$fitted != dat.train$y)
## [1] 0.04
fitsvm.poly = svm(y~.,data=dat.train,kernel="polynomial",degree=3,cost=10000)
plot(fitsvm.poly,dat.train)
mean(fitsvm.poly$fitted != dat.train$y)
## [1] 0
fitsvm.rad = svm(y~.,data=dat.train,kernel="radial",gamma=1,cost=10000)
plot(fitsvm.rad,dat.train)
mean(fitsvm.rad$fitted != dat.train$y)
## [1] 0
pred.linear = predict(fitsvm.linear,dat.test)
pred.poly = predict(fitsvm.poly,dat.test)
pred.rad = predict(fitsvm.rad,dat.test)
mean(pred.linear != dat.test$y)
## [1] 0.18
mean(pred.poly != dat.test$y)
## [1] 0.24
mean(pred.rad != dat.test$y)
## [1] 0.28
set.seed(1)
x1=runif(500)-0.5
x2=runif(500)-0.5
y=1*(x1^2-x2^2 > 0)
plot(x1,x2,col=(1+y))
dat = data.frame(x1=x1,x2=x2,y=as.factor(y))
fit.glm = glm(y~.,data=dat,family="binomial")
summary(fit.glm)
##
## Call:
## glm(formula = y ~ ., family = "binomial", data = dat)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.179 -1.139 -1.112 1.206 1.257
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.087260 0.089579 -0.974 0.330
## x1 0.196199 0.316864 0.619 0.536
## x2 -0.002854 0.305712 -0.009 0.993
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 692.18 on 499 degrees of freedom
## Residual deviance: 691.79 on 497 degrees of freedom
## AIC: 697.79
##
## Number of Fisher Scoring iterations: 3
contrasts(dat$y)
## 1
## 0 0
## 1 1
fit.glm.pred = ifelse(fit.glm$fitted.values >.5,1,0)
plot(dat$x1,dat$x2,col=(fit.glm.pred+1))
fit.glm.nonl = glm(y~poly(x1,2)+poly(x2,2)+(x1*x2),data=dat,family="binomial")
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(fit.glm.nonl)
##
## Call:
## glm(formula = y ~ poly(x1, 2) + poly(x2, 2) + (x1 * x2), family = "binomial",
## data = dat)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -8.240e-04 -2.000e-08 -2.000e-08 2.000e-08 1.163e-03
##
## Coefficients: (2 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -102.2 4302.0 -0.024 0.981
## poly(x1, 2)1 2715.3 141109.5 0.019 0.985
## poly(x1, 2)2 27218.5 842987.2 0.032 0.974
## poly(x2, 2)1 -279.7 97160.4 -0.003 0.998
## poly(x2, 2)2 -28693.0 875451.3 -0.033 0.974
## x1 NA NA NA NA
## x2 NA NA NA NA
## x1:x2 -206.4 41802.8 -0.005 0.996
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 6.9218e+02 on 499 degrees of freedom
## Residual deviance: 3.5810e-06 on 494 degrees of freedom
## AIC: 12
##
## Number of Fisher Scoring iterations: 25
fit.glm.nonl.pred = ifelse(fit.glm.nonl$fitted.values > .5,1,0)
plot(dat$x1,dat$x2,col=(fit.glm.nonl.pred+1))
svm.fit.linear = svm(y~.,data=dat,kernel="linear",cost=10)
summary(svm.fit.linear)
##
## Call:
## svm(formula = y ~ ., data = dat, kernel = "linear", cost = 10)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 10
## gamma: 0.5
##
## Number of Support Vectors: 487
##
## ( 239 248 )
##
##
## Number of Classes: 2
##
## Levels:
## 0 1
plot(svm.fit.linear,dat)
svm.pred.linear = svm.fit.linear$fitted
plot(dat$x1,dat$x2,col=(svm.pred.linear))
svm.fit.rad = svm(y~., data=dat, kernel="radial",cost=10,gamma=1)
svm.pred.rad = svm.fit.rad$fitted
plot(dat$x1,dat$x2,col=svm.pred.rad)
set.seed(1)
#x = matrix(c(rnorm(50,3),rnorm(50),rnorm(50,3.5),rnorm(50)),ncol=2)
x = matrix(runif(1000*2),ncol=2)
x[1:500,2]=x[1:500,2]-1
x[501:1000,]=x[501:1000,]
y = c(rep(1,500),rep(2,500))
plot(x,col=y)
dat = data.frame(x=x,y=as.factor(y))
cost=c(1e-1,1e0,1e1,1e2,1e3,1e4,1e5)
tune.out = tune(svm, y~., data=dat, kernel="linear",
ranges=list(cost=cost))
summary(tune.out)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 10000
##
## - best performance: 0
##
## - Detailed performance results:
## cost error dispersion
## 1 1e-01 0.003 0.004830459
## 2 1e+00 0.003 0.004830459
## 3 1e+01 0.003 0.004830459
## 4 1e+02 0.002 0.004216370
## 5 1e+03 0.002 0.004216370
## 6 1e+04 0.000 0.000000000
## 7 1e+05 0.000 0.000000000
cvCostErrors = data.frame(cost=cost,error=tune.out$performances$error,misclass=tune.out$performances$error*1000,src="cv")
trainCostErrors = data.frame(cost=cost,error=rep(0,length(cost)),misclass=rep(0,length(cost)),src="train")
for(i in 1:length(cost)){
c = cost[i]
fitsvm = svm(y~.,data=dat,kernel="linear",cost=c)
predsvm = fitsvm$fitted
trainCostErrors[i,"misclass"] = sum(predsvm != dat$y)
trainCostErrors[i,"error"] = mean(predsvm != dat$y)
}
#x = matrix(c(rnorm(50,3),rnorm(50),rnorm(50,3.5),rnorm(50)),ncol=2)
xtest = matrix(runif(1000*2),ncol=2)
xtest[1:500,2]=xtest[1:500,2]-1
xtest[501:1000,]=xtest[501:1000,]
ytest = c(rep(1,500),rep(2,500))
plot(xtest,col=ytest)
datTest = data.frame(x=xtest,y=as.factor(ytest))
cost=c(1e-1,1e0,1e1,1e2,1e3,1e4,1e5)
testCostErrors = data.frame(cost=cost,error=rep(0,length(cost)),misclass=rep(0,length(cost)),src="test")
for(i in 1:length(cost)){
c = cost[i]
fitsvm = svm(y~.,data=dat,kernel="linear",cost=c)
predsvm = predict(fitsvm,newdata=datTest)
testCostErrors[i,"misclass"] = sum(predsvm != datTest$y)
testCostErrors[i,"error"] = mean(predsvm != datTest$y)
}
costErrors = rbind(cvCostErrors,testCostErrors,trainCostErrors)
plot(x=cost,y=costErrors[costErrors$src=="train","misclass"],type="l",xlab="cost",ylab="misclass",ylim=c(0,10))
points(x=cost,y=costErrors[costErrors$src=="cv","misclass"],type="l",col=2)
points(x=cost,y=costErrors[costErrors$src=="test","misclass"],type="l",col=3)
In my situation, cross validation and training error seemed to agree with one another that you should increase the cost to around 10,000 to get the best fit with 0 training errors. However, the test set showed that the lower cost of 1 performed the best, with only 4 training errors.
library(ISLR)
library(e1071)
data(Auto)
Auto$mpgHigh = as.factor(ifelse(Auto$mpg > median(Auto$mpg), 1, 0))
set.seed(42)
tune.out = tune(svm, mpgHigh~.-mpg, data=Auto, kernel="linear",
ranges=list(cost=c(1e-2,1e-1,1e0,1e1,1e2,1e3,1e4,1e5)))
summary(tune.out)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 1
##
## - best performance: 0.08685897
##
## - Detailed performance results:
## cost error dispersion
## 1 1e-02 0.08923077 0.04999616
## 2 1e-01 0.09423077 0.06011835
## 3 1e+00 0.08685897 0.04239744
## 4 1e+01 0.09955128 0.04258955
## 5 1e+02 0.11743590 0.04404592
## 6 1e+03 0.10480769 0.05218686
## 7 1e+04 0.10730769 0.04678970
## 8 1e+05 0.10730769 0.04678970
It looks like the cost of 1 yielded the best performance using cross validation. (c) Now repeat (b), this time using SVMs with radial and polyno- mial basis kernels, with different values of gamma and degree and cost. Comment on your results.
tune.out.poly = tune(svm, mpgHigh~.-mpg, data=Auto, kernel="polynomial",
ranges=list(cost=c(1e-2,1e-1,1e0,1e1,1e2,1e3,1e4,1e5),
degree=c(2,3,4,5)))
summary(tune.out.poly)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost degree
## 1e+05 3
##
## - best performance: 0.1021795
##
## - Detailed performance results:
## cost degree error dispersion
## 1 1e-02 2 0.5434615 0.03101869
## 2 1e-01 2 0.5434615 0.03101869
## 3 1e+00 2 0.5434615 0.03101869
## 4 1e+01 2 0.5436538 0.06825206
## 5 1e+02 2 0.3088462 0.05000621
## 6 1e+03 2 0.2781410 0.04619187
## 7 1e+04 2 0.1608333 0.06617894
## 8 1e+05 2 0.1914744 0.06973807
## 9 1e-02 3 0.5434615 0.03101869
## 10 1e-01 3 0.5434615 0.03101869
## 11 1e+00 3 0.5434615 0.03101869
## 12 1e+01 3 0.5434615 0.03101869
## 13 1e+02 3 0.3880128 0.12149162
## 14 1e+03 3 0.2604487 0.05726819
## 15 1e+04 3 0.1685256 0.05971483
## 16 1e+05 3 0.1021795 0.04036958
## 17 1e-02 4 0.5434615 0.03101869
## 18 1e-01 4 0.5434615 0.03101869
## 19 1e+00 4 0.5434615 0.03101869
## 20 1e+01 4 0.5434615 0.03101869
## 21 1e+02 4 0.5434615 0.03101869
## 22 1e+03 4 0.5434615 0.03101869
## 23 1e+04 4 0.4390385 0.07173917
## 24 1e+05 4 0.3370513 0.06165839
## 25 1e-02 5 0.5434615 0.03101869
## 26 1e-01 5 0.5434615 0.03101869
## 27 1e+00 5 0.5434615 0.03101869
## 28 1e+01 5 0.5434615 0.03101869
## 29 1e+02 5 0.5434615 0.03101869
## 30 1e+03 5 0.5434615 0.03101869
## 31 1e+04 5 0.5434615 0.03101869
## 32 1e+05 5 0.4313462 0.07316837
tune.out.rad = tune(svm, mpgHigh~.-mpg, data=Auto, kernel="radial",
ranges=list(cost=c(1e-2,1e-1,1e0,1e1,1e2,1e3,1e4,1e5),
gamma=c(.1,.5,1,2,3,5)))
summary(tune.out.rad)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost gamma
## 1 0.5
##
## - best performance: 0.08166667
##
## - Detailed performance results:
## cost gamma error dispersion
## 1 1e-02 0.1 0.22967949 0.09904027
## 2 1e-01 0.1 0.08679487 0.04021093
## 3 1e+00 0.1 0.08679487 0.04021093
## 4 1e+01 0.1 0.09448718 0.03648306
## 5 1e+02 0.1 0.13019231 0.05744045
## 6 1e+03 0.1 0.12256410 0.05116600
## 7 1e+04 0.1 0.12256410 0.05116600
## 8 1e+05 0.1 0.12256410 0.05116600
## 9 1e-02 0.5 0.54083333 0.03353278
## 10 1e-01 0.5 0.08935897 0.04032884
## 11 1e+00 0.5 0.08166667 0.04463634
## 12 1e+01 0.5 0.09455128 0.03438412
## 13 1e+02 0.5 0.09967949 0.03527551
## 14 1e+03 0.5 0.09967949 0.03527551
## 15 1e+04 0.5 0.09967949 0.03527551
## 16 1e+05 0.5 0.09967949 0.03527551
## 17 1e-02 1.0 0.54083333 0.03353278
## 18 1e-01 1.0 0.54083333 0.03353278
## 19 1e+00 1.0 0.08423077 0.03991005
## 20 1e+01 1.0 0.08685897 0.03667161
## 21 1e+02 1.0 0.08685897 0.03667161
## 22 1e+03 1.0 0.08685897 0.03667161
## 23 1e+04 1.0 0.08685897 0.03667161
## 24 1e+05 1.0 0.08685897 0.03667161
## 25 1e-02 2.0 0.54083333 0.03353278
## 26 1e-01 2.0 0.54083333 0.03353278
## 27 1e+00 2.0 0.12006410 0.03676959
## 28 1e+01 2.0 0.11743590 0.03272065
## 29 1e+02 2.0 0.11743590 0.03272065
## 30 1e+03 2.0 0.11743590 0.03272065
## 31 1e+04 2.0 0.11743590 0.03272065
## 32 1e+05 2.0 0.11743590 0.03272065
## 33 1e-02 3.0 0.54083333 0.03353278
## 34 1e-01 3.0 0.54083333 0.03353278
## 35 1e+00 3.0 0.36961538 0.13200605
## 36 1e+01 3.0 0.33634615 0.11091454
## 37 1e+02 3.0 0.33634615 0.11091454
## 38 1e+03 3.0 0.33634615 0.11091454
## 39 1e+04 3.0 0.33634615 0.11091454
## 40 1e+05 3.0 0.33634615 0.11091454
## 41 1e-02 5.0 0.54083333 0.03353278
## 42 1e-01 5.0 0.54083333 0.03353278
## 43 1e+00 5.0 0.47698718 0.04089684
## 44 1e+01 5.0 0.47698718 0.03513181
## 45 1e+02 5.0 0.47698718 0.03513181
## 46 1e+03 5.0 0.47698718 0.03513181
## 47 1e+04 5.0 0.47698718 0.03513181
## 48 1e+05 5.0 0.47698718 0.03513181
The linear (cost = 1, error .086) and radial (gamma = .5, cost = 1, error .081) outperformed the best polynomial kernel svm (degree = 3, cost = 1e5, error .10). I’d say radial is the best choice here, because the low cost and gamma suggest that the likelihood of overfitting the data is lower than the high costs it took to achieve good cross validation performance using the other two kernel methods. (d) Make some plots to back up your assertions in (b) and (c). Hint: In the lab, we used the plot() function for svm objects only in cases with p = 2. When p > 2, you can use the plot() function to create plots displaying pairs of variables at a time. Essentially, instead of typing > plot(svmfit , dat) where svmfit contains your fitted model and dat is a data frame containing your data, you can type > plot(svmfit , dat , x1∼x4) in order to plot just the first and fourth variables. However, you must replace x1 and x4 with the correct variable names. To find out more, type ?plot.svm.
svmfit.linear = svm(mpgHigh~.,data=Auto,kernel="linear",cost=1)
plotSvmPairs = function(fit, var, noPlotList, dataSet) {
for(name in names(dataSet)[!(names(dataSet) %in% c(noPlotList, var))]){
print(paste0("Plotting ",var,"~",name))
plot(fit, dataSet, as.formula(paste0(var,"~",name)))
}
}
noPlotList = c("name","mpgHigh")
plotSvmPairs(fit=svmfit.linear, var="mpg", noPlotList=noPlotList, Auto)
## [1] "Plotting mpg~cylinders"
## [1] "Plotting mpg~displacement"
## [1] "Plotting mpg~horsepower"
## [1] "Plotting mpg~weight"
## [1] "Plotting mpg~acceleration"
## [1] "Plotting mpg~year"
## [1] "Plotting mpg~origin"
svmfit.poly = svm(mpgHigh~.,data=Auto,kernel="polynomial",cost=1e5,degree=3)
plotSvmPairs(fit=svmfit.poly, var="mpg", noPlotList=noPlotList, Auto)
## [1] "Plotting mpg~cylinders"
## [1] "Plotting mpg~displacement"
## [1] "Plotting mpg~horsepower"
## [1] "Plotting mpg~weight"
## [1] "Plotting mpg~acceleration"
## [1] "Plotting mpg~year"
## [1] "Plotting mpg~origin"
svm.radial <- svm(mpgHigh ~ ., data = Auto, kernel = "radial", cost = 100, gamma = 0.01)
plotSvmPairs(fit=svm.radial, var="mpg", noPlotList=noPlotList, Auto)
## [1] "Plotting mpg~cylinders"
## [1] "Plotting mpg~displacement"
## [1] "Plotting mpg~horsepower"
## [1] "Plotting mpg~weight"
## [1] "Plotting mpg~acceleration"
## [1] "Plotting mpg~year"
## [1] "Plotting mpg~origin"
library(ISLR)
library(e1071)
data(OJ)
set.seed(42)
train = sample(nrow(OJ),800)
OJ.train = OJ[train,]
OJ.test = OJ[-train,]
svmfit = svm(Purchase~., data=OJ.train, kernel="linear", cost=0.01)
summary(svmfit)
##
## Call:
## svm(formula = Purchase ~ ., data = OJ.train, kernel = "linear",
## cost = 0.01)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 0.01
## gamma: 0.05555556
##
## Number of Support Vectors: 439
##
## ( 219 220 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
The fit to the training data used 423 support vectors with 211 in one class and 212 in the other. (c) What are the training and test error rates?
print(paste0("Training error: ", round(mean(svmfit$fitted != OJ.train$Purchase),2)))
## [1] "Training error: 0.16"
print(paste0("Test error: ", round(mean(predict(svmfit,OJ.test) != OJ.test$Purchase),2)))
## [1] "Test error: 0.19"
set.seed(43)
cost = 10^seq(-2,1,by=.25)
tune.out = tune(svm, Purchase~., data = OJ.train, kernel="linear", ranges=list(cost=cost))
summary(tune.out)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 0.1
##
## - best performance: 0.16875
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01000000 0.17125 0.06347845
## 2 0.01778279 0.17125 0.06347845
## 3 0.03162278 0.17250 0.06449591
## 4 0.05623413 0.17250 0.06422616
## 5 0.10000000 0.16875 0.05781015
## 6 0.17782794 0.17125 0.06456317
## 7 0.31622777 0.17500 0.05921946
## 8 0.56234133 0.17375 0.05964304
## 9 1.00000000 0.17625 0.05964304
## 10 1.77827941 0.17375 0.06022239
## 11 3.16227766 0.17375 0.05935124
## 12 5.62341325 0.17375 0.05787019
## 13 10.00000000 0.17500 0.05892557
Best cost was 0.01. (e) Compute the training and test error rates using this new value for cost.
svmfit = svm(Purchase~., data=OJ.train, kernel="linear", cost=0.1)
print(paste0("Training error: ", round(mean(svmfit$fitted != OJ.train$Purchase),2)))
## [1] "Training error: 0.16"
print(paste0("Test error: ", round(mean(predict(svmfit,OJ.test) != OJ.test$Purchase),2)))
## [1] "Test error: 0.17"
svmfit = svm(Purchase~., data=OJ.train, kernel="radial", cost=0.01)
summary(svmfit)
##
## Call:
## svm(formula = Purchase ~ ., data = OJ.train, kernel = "radial",
## cost = 0.01)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 0.01
## gamma: 0.05555556
##
## Number of Support Vectors: 638
##
## ( 318 320 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
print(paste0("Training error: ", round(mean(svmfit$fitted != OJ.train$Purchase),2)))
## [1] "Training error: 0.4"
print(paste0("Test error: ", round(mean(predict(svmfit,OJ.test) != OJ.test$Purchase),2)))
## [1] "Test error: 0.37"
set.seed(43)
cost = 10^seq(-2,1,by=.25)
tune.out = tune(svm, Purchase~., data = OJ.train, kernel="radial", ranges=list(cost=cost))
summary(tune.out)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 0.5623413
##
## - best performance: 0.1775
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01000000 0.39750 0.08815233
## 2 0.01778279 0.39750 0.08815233
## 3 0.03162278 0.34625 0.11815274
## 4 0.05623413 0.20875 0.07169815
## 5 0.10000000 0.18375 0.04825065
## 6 0.17782794 0.19000 0.05027701
## 7 0.31622777 0.18250 0.04090979
## 8 0.56234133 0.17750 0.04281744
## 9 1.00000000 0.17750 0.04556741
## 10 1.77827941 0.18125 0.04259385
## 11 3.16227766 0.18500 0.04440971
## 12 5.62341325 0.18125 0.04340139
## 13 10.00000000 0.19875 0.05447030
svmfit = svm(Purchase~., data=OJ.train, kernel="radial", cost=0.5623413)
print(paste0("Training error: ", round(mean(svmfit$fitted != OJ.train$Purchase),2)))
## [1] "Training error: 0.15"
print(paste0("Test error: ", round(mean(predict(svmfit,OJ.test) != OJ.test$Purchase),2)))
## [1] "Test error: 0.15"
svmfit = svm(Purchase~., data=OJ.train, kernel="polynomial", cost=0.01, degree=2)
summary(svmfit)
##
## Call:
## svm(formula = Purchase ~ ., data = OJ.train, kernel = "polynomial",
## cost = 0.01, degree = 2)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: polynomial
## cost: 0.01
## degree: 2
## gamma: 0.05555556
## coef.0: 0
##
## Number of Support Vectors: 642
##
## ( 318 324 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
print(paste0("Training error: ", round(mean(svmfit$fitted != OJ.train$Purchase),2)))
## [1] "Training error: 0.4"
print(paste0("Test error: ", round(mean(predict(svmfit,OJ.test) != OJ.test$Purchase),2)))
## [1] "Test error: 0.37"
set.seed(43)
cost = 10^seq(-2,1,by=.25)
tune.out = tune(svm, Purchase~., data = OJ.train, kernel="polynomial", degree=2, ranges=list(cost=cost))
summary(tune.out)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 5.623413
##
## - best performance: 0.1875
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01000000 0.39750 0.08815233
## 2 0.01778279 0.38625 0.08527936
## 3 0.03162278 0.37375 0.08446112
## 4 0.05623413 0.34750 0.07812917
## 5 0.10000000 0.33750 0.07660323
## 6 0.17782794 0.25625 0.07270803
## 7 0.31622777 0.22250 0.05974483
## 8 0.56234133 0.22125 0.05834821
## 9 1.00000000 0.21125 0.05905800
## 10 1.77827941 0.20500 0.06043821
## 11 3.16227766 0.19250 0.04794383
## 12 5.62341325 0.18750 0.03908680
## 13 10.00000000 0.18875 0.04059026
svmfit = svm(Purchase~., data=OJ.train, kernel="polynomial", degree=2, cost=5.62341325)
print(paste0("Training error: ", round(mean(svmfit$fitted != OJ.train$Purchase),2)))
## [1] "Training error: 0.16"
print(paste0("Test error: ", round(mean(predict(svmfit,OJ.test) != OJ.test$Purchase),2)))
## [1] "Test error: 0.17"
Overall, the linear kernel with cost = .1, and the polynomial kernel with cost = 5.62341325 and degree 2 both underperform the radial kernel with default gamma and cost = 0.5623413. The latter two achieved 17% test error rates, while the radial kernel was able to achieve 15%.